/* 
THIS DOFILE PRODUCES SEVERAL TABLES AND FIGURES USING EMPLOYER X AGENCY LEVEL DATASET FROM


"Regulation by Reputation? Intermediaries, Labor Abuses, and International Migration" 
- A. Nilesh Fernando (University of Notre Dame); corresponding author (nilesh.fernando@nd.edu)  
- Niharika Singh (University of Notre Dame) 


These include Figure 3, Appendix A15, Appendix A16, Appendix A17, Appendix B11, and 
Appendix B8

See readme.docx for details 

*/


clear all
set matsize 1000
set more off 
#delimit;

* Set directory; 

cd "~/Desktop/migrec_replication/do/";

use  ../dta_secure/employer_agency_panel.dta, clear; 


*-----------------------------------------------------------------------------*
* FIGURE 3: THE DISTRIBUTION OF MATCHES TO GOOD EMPLOYERS BY AGENCY QUALITY   *
*-----------------------------------------------------------------------------;

preserve; 

keep if performance_rate >= 26.9;


* Panel A: Pre-Program and Year 1 of RI; 

twoway (kdensity performance_rate if elig == 1 & year < 2010 & good_employer_pre2009 == 1, 
width(2) color(gs5) lpattern(longdash))

(kdensity performance_rate  if elig == 1 & year == 2010 
& good_employer_pre2009 == 1, recast(scatter) msymbol(triangle) msize(vsmall) mlcolor(black%30) 
mlwidth(vthin) mfcolor(gs9) width(2) lpattern(dash)), 

legend(off)  xscale(off) ytitle("Density")
name(good1c, replace) ylab(, nogrid) xlab(, nogrid)  xline(30, lpatter(longdash) lcolor(black%8))
xline(40, lpatter(longdash) lcolor(black%8)) xline(50, lpatter(longdash) lcolor(black%8)) 
xline(60, lpatter(longdash) lcolor(black%8)) graphregion(color(white))
caption("Panel A: Pre-Program and Year 1 of RI", size(small) position(2) ring(0));


graph export "../out/good_pre_10_cut.pdf", replace;


** Panel B: Pre-Program, Yr. 1, and Yr. 2 of RI; 


twoway (kdensity performance_rate if elig == 1 & year < 2010 & good_employer_pre2009 == 1, 
width(2) color(gs5) lpattern(longdash))

(kdensity performance_rate  if elig == 1 & year == 2010 
& good_employer_pre2009 == 1, recast(scatter) msymbol(triangle) msize(vsmall) mlcolor(black%30) 
mlwidth(vthin) mfcolor(gs9) width(2) lpattern(dash))

(kdensity performance_rate  if elig == 1 & year == 2011 
& good_employer_pre2009 == 1, recast(scatter) msymbol(triangle) msize(vsmall) mlcolor(black) 
mlwidth(vthin) mfcolor(gs9) width(2) lpattern(dash))

 , legend(off)  xscale(off) ytitle("Density") name(good2c, replace) ylab(, nogrid)
 xline(30, lpatter(longdash) lcolor(black%8))
xline(40, lpatter(longdash) lcolor(black%8)) xline(50, lpatter(longdash) lcolor(black%8)) 
xline(60, lpatter(longdash) lcolor(black%8)) graphregion(color(white))
caption("Panel B: Pre-Program, Yr 1, & Yr 2 of RI", size(small) position(2) ring(0));


graph export "../out/good_pre_11_cut.pdf", replace;


** Panel C: All Years; 


twoway (kdensity performance_rate  if elig == 1 & year < 2010 & good_employer_pre2009 == 1, 
width(2) color(gs5) lpattern(longdash))

(kdensity performance_rate  if elig == 1 & year == 2010 
& good_employer_pre2009 == 1, recast(scatter) msymbol(triangle) msize(vsmall) mlcolor(black%30) 
mlwidth(vthin) mfcolor(gs9) width(2) lpattern(dash))

(kdensity performance_rate  if elig == 1 & year == 2011 
& good_employer_pre2009 == 1, recast(scatter) msymbol(triangle) msize(vsmall) mlcolor(black) 
mlwidth(vthin) mfcolor(gs9) width(2) lpattern(dash))

(kdensity performance_rate  if elig == 1 & year == 2011 
& good_employer_pre2009 == 1, recast(scatter) msymbol(triangle) msize(vsmall) mlcolor(black) 
mlwidth(vthin) mfcolor(gs9) width(2) lpattern(dash))


(kdensity performance_rate if elig == 1 & year == 2012 & good_employer_pre2009 == 1, 
 width(2) color(black%5))

(kdensity performance_rate if elig == 1 & year == 2013 & good_employer_pre2009 == 1, 
 width(2) color(black%20))

(kdensity performance_rate if elig == 1 & year == 2014 & good_employer_pre2009 == 1, 
width(2) color(black%50))

(kdensity performance_rate if elig == 1 & year == 2015 & good_employer_pre2009 == 1, 
width(2) color(black))

, legend(order (1 2 3 5 6 7 8)  lab(1 "2005-09") lab(2 "2010") lab(3 "2011") lab(5 "2012") 
lab(6 "2013") lab(7 "2014") lab(8 "2015")) xtitle("Combined Score (2012 SLBFE Rating)")
 ytitle("Density") name(good3c, replace) ylab(, nogrid) xline(30, lpatter(longdash) lcolor(black%5))
 xline(30, lpatter(longdash) lcolor(black%8))
xline(40, lpatter(longdash) lcolor(black%8)) xline(50, lpatter(longdash) lcolor(black%8)) 
xline(60, lpatter(longdash) lcolor(black%8)) graphregion(color(white))
caption("Panel C: All Years", size(small) position(2) ring(0));


graph export "../out/good_all_cut.pdf", replace;


** Combine output into single figure ;

grc1leg2 good1c good2c good3c, graphregion(color(white)) plotregion(color(white))
 cols(1) xcommon imargin(b=1 t=1) legendfrom(good3c) position(2) lcols(1) lms(small)
  name(matches_combo, replace);

graph export "../out/matches_combo_v3.pdf", replace;

restore;

*------------------------------------------------------*
* APPENDIX A15: PRE-PROGRAM EMPLOYER TO AGENCY MATCHES *
*------------------------------------------------------;


twoway (kdensity performance_rate  if elig == 1 & year < 2010 & good_employer_pre2009 == 1,
width(2) color(red%50))  (kdensity performance_rate if elig == 1 & year < 2010 & 
good_employer_pre2009 == 0, 
width(2) color(blue%50)) , legend(order (1 2) lab(1 "Good Employers") 
lab(2 "Bad Employers")) xtitle("2012 Combined Score/Agency Rating") ytitle("Density")   ;

graph export "../out/good_bad_pre.pdf", replace;



*----------------------------------------------------------------*
* APPENDIX A16: WAGES AND AMENITIES BY EMPLOYER COMPLAINT RATE   *
*----------------------------------------------------------------;

preserve; 

foreach x of varlist  num_vac_req_skilltype2 num_vac_req_construction
 num_vac_req_domestic num_vac_req_retail {; 

gen `x'_share = `x'/num_vac_req ; 

};

rename num_vac_req_skilltype2_share num_vac_req_drivers_share;

gen num_employers = 1; 

** generate bins for complaint rate; 

binscatter usd_salary_win emp_comp_rate_pre2009 if year < 2010 & migrants_pre2009 >= 50 
 & emp_comp_rate_pre2009 < 0.15 ,    nquantiles(30) line(none) genxq(complaint_bins);

* restrict sample to pre-program data with > 50 migrants and the complaint rate is less than 15\% ; 
 keep if year < 2010 & migrants_pre2009 >= 50  & emp_comp_rate_pre2009 < 0.15; 
 
 collapse (mean) fill_rate  num_vac_req_drivers_share num_vac_req_construction_share
  num_vac_req_domestic_share num_vac_req_retail_share mean_vac_req emp_comp_rate_pre2009 
  usd_salary_win num_vac_req_construction num_vac_req_domestic num_vac_req_retail
   ticket_share_vac food_share_vac accom_share_vac med_share_vac ticket_share_jo 
   food_share_jo accom_share_jo med_share_jo domestic_share (sum) migrant a1_professional
    a2_mid_clerical a3_skilled a4_semiskilled a5_unskilled a6_dw, by(complaint_bins);

* Panel A: All Employers; 
    
 twoway (scatter usd_salary_win emp_comp_rate_pre2009 if 
 usd_salary_win <= 275 & emp_comp_rate_pre2009 <= 0.125, yaxis(1) ytitle("Avg. Salary (USD)",
   axis(1))) (scatter med_share_jo emp_comp_rate_pre2009, 
    yaxis(2) ytitle("Share with Amenity", axis(2))) 
    (scatter ticket_share_vac emp_comp_rate_pre2009, 
     yaxis(2)) (histogram emp_comp_rate_pre2009, 
     fraction yaxis(2) bin(10) color(gs10%50)) ,
      xtitle("Employer pre-program complaint rate") 
      legend(order(1 2 3 4 5 6) rows(2) lab(1 "Salary")
       lab(2 "Health Insurance") lab(3 "Return Airfare")); 
 
 
 graph export "../out/salaries_amenities_v2_all.pdf", replace;

* Panel B: Employers and Agencies Specializing in Domestic Work;

twoway (scatter usd_salary_win emp_comp_rate_pre2009 if usd_salary_win <= 275 
& emp_comp_rate_pre2009 <= 0.125 & domestic_share > 0.5, yaxis(1) 
ytitle("Avg. Salary (USD)", axis(1))) (scatter med_share_jo emp_comp_rate_pre2009, 
 yaxis(2) ytitle("Share with Amenity", axis(2)))
  (scatter ticket_share_vac emp_comp_rate_pre2009, 
   yaxis(2)) (histogram emp_comp_rate_pre2009, fraction yaxis(2) bin(10) 
   color(gs10%50)) , xtitle("Employer pre-program complaint rate")
    legend(order(1 2 3 4 5 6) rows(2) lab(1 "Salary") 
    lab(2 "Health Insurance") lab(3 "Return Airfare")); 

  graph export "../out/salaries_amenities_v2_dw.pdf", replace;

restore;


*-----------------------------------------------------------*
* APPENDIX A17: MATCHES TO BAD EMPLOYERS BY AGENCY QUALITY **
*-----------------------------------------------------------;

preserve;

keep if performance_rate >= 26.9;


* install command needed to combine graph output; 
cap ssc install grc1leg2; 


* Panel A: Pre-Program and Year 1 of RI; 

twoway

(kdensity performance_rate  if elig == 1 & year < 2010 & good_employer_pre2009 == 0,
width(2) color(red%70))  

(kdensity performance_rate  if elig == 1 & year == 2010 & good_employer_pre2009 == 0,
width(2) color(blue%50)) 

 , legend(position(2) order (1 2 3 4 5 6 7)  lab(1 "2005-09") lab(2 "2010") 
 lab(3 "2011") lab(4 "2012") lab(5 "2013") 
 lab(6 "2014") lab(7 "2015")) xscale(off)  ytitle("Density") name(bad1c, replace) 
 caption("Panel A: Pre-Program and Year 1 of RI", size(small) position(2) ring(0)) ;

graph export "../out/bad_pre_10_cut.pdf", replace;

** Panel B: Pre-Program, Yr. 1, and Yr. 2 of RI; 

twoway

(kdensity performance_rate  if elig == 1 & year < 2010 & good_employer_pre2009 == 0,
width(2) color(red%70))  

(kdensity performance_rate  if elig == 1 & year == 2010 & good_employer_pre2009 == 0,
width(2) color(blue%50)) 

(kdensity performance_rate if elig == 1 & year == 2011 & good_employer_pre2009 == 0, 
width(2) color(blue%80))


 ,legend(position(2) order (1 2 3 4 5 6 7)  lab(1 "2005-09") lab(2 "2010") lab(3 "2011")
  lab(4 "2012") lab(5 "2013") 
 lab(6 "2014") lab(7 "2015") )  xscale(off) ytitle("Density") name(bad2c, replace) 
 caption("Panel B: Pre-Program, Yr 1, & Yr 2 of RI", size(small) position(2) ring(0));


graph export "../out/bad_pre_11_cut.pdf", replace;

** Panel C: All Years; 

twoway

(kdensity performance_rate  if elig == 1 & year < 2010 & good_employer_pre2009 == 0,
width(2) color(red%60))  

(kdensity performance_rate  if elig == 1 & year == 2010 & good_employer_pre2009 == 0,
width(2) color(blue%50)) 

(kdensity performance_rate if elig == 1 & year == 2011 & good_employer_pre2009 == 0, 
width(2) color(blue%80))

(kdensity performance_rate if elig == 1 & year == 2012 & good_employer_pre2009 == 0, 
width(2) color(green%50))

(kdensity performance_rate if elig == 1 & year == 2013 & good_employer_pre2009 == 0, 
width(2) color(green%60))

(kdensity performance_rate if elig == 1 & year == 2014 & good_employer_pre2009 == 0, 
width(2) color(green%70))

(kdensity performance_rate if elig == 1 & year == 2015 & good_employer_pre2009 == 0, 
width(2) color(green%80))

 , legend(position(2) order (1 2 3 4 5 6 7)  lab(1 "2005-09") lab(2 "2010") 
 lab(3 "2011") lab(4 "2012") lab(5 "2013") 
 lab(6 "2014") lab(7 "2015") ) xtitle("Combined Score (2012 SLBFE Rating)") 
  ytitle("Density")  name(bad3c, replace) 
caption("Panel C: All Years", size(small) position(2) ring(0)); 

graph export "../out/bad_all_cut.pdf", replace;

** Combine output into single figure ;

grc1leg2 bad1c bad2c bad3c, cols(1) xcommon imargin(b=1 t=1) legendfrom(bad3c)
 position(2) lcols(1) name(bad_matches, replace);

graph export "../out/bad_matches_v1.pdf", replace;

restore;

*----------------------------------------------*
* APPENDIX B8: SALARIES PAID BY BAD EMPLOYERS  *
*----------------------------------------------;

binscatter usd_salary_win_d year if  good_employer_pre2009 == 0 & year >= 2009, 
by(elig_firm) line(none) xline(2010, lcolor(blue)) xline(2012, lcolor(green)) 
legend(order (1 2) lab(1 "Comparison") lab(2 "Eligible")) ytitle("Avg. Salary (USD)")  ;

graph export "../out/bad_employer_salaries_by_elig_status.pdf", replace;

*------------------------------------------------------*
* APPENDIX B11: K-S TESTS FOR EMPLOYER-AGENCY MATCHES *
*------------------------------------------------------;

gen year_2010 = (year == 2010); 
gen year_2011 = (year == 2011); 
gen year_2012 = (year == 2012); 
gen year_2013 = (year == 2013); 
gen year_2014 = (year == 2014);  
gen year_2015 = (year == 2015); 

* Define years to loop over;
local years 2010 2011 2012 2013 2014 2015;

* Set up temporary postfile;
tempname memhold;
tempfile results;
postfile `memhold' int year double d_good double p_good double d_bad double p_bad 
using `results', replace;

* Loop over years;
foreach y of local years { ;

    * Good employers;
    quietly ksmirnov performance_rate if elig == 1 & (year == `y' | year < 2010)
     & good_employer_pre2009 == 1, by(year_`y');
    local d_good = r(D);
    local p_good = r(p);

    * Bad employers;
    quietly ksmirnov performance_rate if elig == 1 & (year == `y' | year < 2010)
     & bad_employer_pre2009 == 1, by(year_`y');
    local d_bad = r(D);
    local p_bad = r(p);

    * Store results;
    post `memhold' (`y') (`d_good') (`p_good') (`d_bad') (`p_bad');
};

* Close and load;
postclose `memhold';
use `results', clear;

* Format strings for LaTeX;
gen str year_str = string(year) ;
gen D1 = string(round(d_good, 0.0001), "%9.4f") ;
gen P1 = string(round(p_good, 0.001), "%9.3f") ;
gen D2 = string(round(d_bad, 0.0001), "%9.4f") ;
gen P2 = string(round(p_bad, 0.001), "%9.3f") ;
gen texrow = year_str + " & " + D1 + " & " + P1 + " & " + D2 + " & " + P2 + " \\\\" ;

* Write LaTeX table body;
file open table using "../out/ks_output_table.tex", write replace ;
file write table "\begin{tabular}{lcc|cc}" _n ;
file write table "\toprule" _n ;
file write table " & \multicolumn{2}{c|}{Good Employers} & \multicolumn{2}{c}{Bad Employers} \\\\" _n ;
file write table "Year & D & \emph{p}-value & D & \emph{p}-value \\\\" _n ;
file write table "\midrule" _n ;

forvalues i = 1/`=_N' {;
    local thisrow = texrow in `i' ;
    file write table "`thisrow'" _n ;
};

file write table "\bottomrule" _n ;
file write table "\end{tabular}" _n ;
file close table ;

